patch: optimize selects by extracting exclusive branches#205
Closed
Andersama wants to merge 1 commit intohanickadot:mainfrom
Closed
patch: optimize selects by extracting exclusive branches#205Andersama wants to merge 1 commit intohanickadot:mainfrom
Andersama wants to merge 1 commit intohanickadot:mainfrom
Conversation
6586a49 to
8060195
Compare
Contributor
Author
|
@hanickadot have you experimented with creating a custom vtable for the select expression? Eg: something like namespace detail {
template<typename AlphabetType, size_t N, typename T>
constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, T atom, uint8_t val) {
bool has_collision = false;
for (size_t i = 0; i < table.size(); i++) {
//check if we have a collision with the existing table
has_collision = (table[i] != 0 && (T::match_char(i)));
table[i] = T::match_char(i) ? val : table[i];
}
return has_collision;
}
template<typename AlphabetType, size_t N, typename... Ts>
constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, ctll::list<Ts...> atom, uint8_t val) {
bool has_collision = false;
for (size_t i = 0; i < table.size(); i++) {
//check if we have a collision with the existing table
has_collision = (table[i] != 0 && ((Ts::match_char(i)) || ...));
table[i] = ((Ts::match_char(i)) || ...) ? val : table[i];
}
return has_collision;
}
template<typename AlphabetType, size_t N, typename... Ts>
constexpr auto write_vtable_cases(ctll::list<Ts...> atoms) {
std::array<AlphabetType, N> table{};
int dummy;
bool has_collision = false;
size_t idx = sizeof...(Ts);
//see foonathan's nifty fold expressions
//reverse order of overwrite_alphabet, make sure indexs count down*
(dummy = ... = ((has_collision |= overwrite_alphabet(table, Ts{}, idx--)), 0));
size_t count_nonzero = 0;
for (size_t i = 0; i < table.size(); i++) {
count_nonzero += table[i] != 0;
//now we shift all the indexs over by one and make room for fail state
table[i] = table[i] != 0 ? table[i] - 1 : sizeof...(Ts); //last index is reject state
}
return std::make_tuple(count_nonzero, has_collision, table);
}
template<typename ListAst, typename R, typename Iterator, typename EndIterator>
constexpr auto evaluate_wrapped(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures) {
return evaluate(begin, current, last, f, captures, ListAst{});
}
}
// matching select in patterns
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
if constexpr (sizeof...(TailOptions) > 0 && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
constexpr auto vtable_tuple = detail::write_vtable_cases<uint8_t,256>(ctll::list<decltype(calculate_first(sequence<HeadOptions, Tail...>{})), decltype(calculate_first(sequence<TailOptions, Tail...>{}))...>{});
constexpr auto vtable_cases = std::get<2>(vtable_tuple);
constexpr auto has_collisions = std::get<1>(vtable_tuple);
constexpr std::array<R(*)(const Iterator, Iterator, const EndIterator, const flags&, R), sizeof...(TailOptions) + 2> vtable = {
detail::evaluate_wrapped<sequence<HeadOptions, Tail...>, R, Iterator, EndIterator>,
detail::evaluate_wrapped<sequence<TailOptions, Tail...>, R, Iterator, EndIterator>...,
detail::evaluate_wrapped<sequence<reject>, R, Iterator, EndIterator>
};
uint8_t case_num = current != last ? vtable_cases[*current] : (vtable.size()-1);
if constexpr (has_collisions) {
for (size_t i = case_num; i < vtable.size(); i++) {
if (auto r = vtable[i](begin, current, last, f, captures)) {
return r;
}
}
} else {
return vtable[case_num](begin, current, last, f, captures);
}
return not_matched;
} else {
if (auto r = evaluate(begin, current, last, f, captures, ctll::list<HeadOptions, Tail...>())) {
return r;
} else {
return evaluate(begin, current, last, f, captures, ctll::list<select<TailOptions...>, Tail...>());
}
}
}So far as I can tell, with larger select expresions this cuts down on compile time, msvc performs better when the character table is made static (compile error with clang), but clang appears to do far better. For example in MSVC I can compile static constexpr auto real_lexer_pattern = ctll::fixed_string{
"(\\s++)|(//[^\r\n]*+)|(/[*](?:[^*]++|[*][^\\x2F])+[*]/)|"
"([a-zA-Z_][a-zA-Z0-9]*+)|"
"([0-9]+[eE][\\+\\-]?[0-9]+(?:[fFlL]?))|"
"([0-9]*[.][0-9]+(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"([0-9]+[.][0-9]*(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+[pP][\\+\\-]?[0-9]+(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]*[.][0-9a-fA-F]+(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+[.][0-9a-fA-F]*(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+)|"
"(0[0-7]+)|"
"(0[bB][0-1]+)|"
"([0-9]++)|"
"(\"(?:[^\\\\\"]+|\\\\[\\s\\S])*+\")|"
"('(?:[^\\\\']+|\\\\[\\s\\S])*+')|"
"(!=)|(!)|"
"(#)|($)|"
"(%=)|(%)|"
"(&&=)|(&&)|(&=)|(&)|"
"([(])|([)])|"
"([*]=)|([*])|"
"([+][+])|([+]=)|([+])|"
"[,]|"
"(--)|(-=)|(->)|(-)|"
"([.][.][.])|([.][.])|([.])|"
"(/=)|(/)|"
"(::)|(:=)|(:)|"
"(;)|"
"(<<=)|(<<)|(<=>)|(<=)|(<>)|(<)|"
"(>>=)|(>=)|(>)|"
"(==)|(=)|"
"(>=)|(>>=)|(>>)|(>)|"
"([?])|([@])|(\\[)|"
"(\\\\)|"
"(\\])|"
"(^=)|(^)|"
"([`])|([{])|"
"([|]=)|([|][|]=)|([|][|])|([|])|"
"([}])|"
"(~=)|(~)"
};
std::optional<lex_item> real_lexer(std::string_view v) noexcept {
auto m = ctre::starts_with<real_lexer_pattern>(v);
if (m) {
if (m.get<1>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<2>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<3>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<4>()) {
return lex_item{ type::identifier, m.view() };
}
else if (m.get<5>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<6>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<7>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<8>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<9>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<10>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<11>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<12>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<13>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<14>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<15>()) {
return lex_item{ type::str, m.view() };
}
else if (m.get<16>()) {
return lex_item{ type::chr, m.view() };
} else {
return lex_item{ type::ops, m.view() };
}
}
return std::nullopt;
}in couple of seconds with the vtable type approach. I'm currently upwards of half an hour waiting for the original to compile, not sure if it'll even finish. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Should supersede #158, actually extracts mutually exclusive paths.
EG: in lexer example, it'll recognize that "([a-z]+)|([0-9]+)" has mutually exclusive paths and will split them apart from one another based on the first character. In theory should reduce runtimes, needs testing.